
###############################################################################
### Did 3G Make Afghan Insurgents Fight More Effectively? 
### Replication files for analyses
### Mehmet Erdem Arslan
###
### Appendix C: 
### Test of Potential Reporting Bias in GTD Between 2G and 3G Network Coverage
###############################################################################

# The test of potential reporting bias implemented here is a modified version of the test in Weidmann (2016)


# setwd("your file path here")

library(sf)
library(spatialreg)
library(ggplot2)
library(grid)
library(gridExtra)

load("R/Reporting_bias_diagnostics.Rdata")

# 1776 events coded in GTD in Afghanistan in the year 2018
# 4 events does not have coordinates recorded
# 127 events have "nkill" (number of killed) variable recorded as NA 
# remaining 1645 events stored in "gtd" dataframe 

# w: neighbour list for spatial analyses

# for this test:
# 1645 events, 50% each, slide by 20 events in each of the 42 datasets in total

# afg_adm dataset:
# id: District (Admin2) id 
# gtd...: gtd event data subsets with increasing severity 
# two: proportion of the area covered by 2G network
# three: proportion of the area covered by 3G network
# Wtwo: spatial lag for 2G network coverage
# Wthree: spatial lag for 3G network coverage
# troops: troop presence (binary)

# mean severity for each subset
severity <- c(0)
for (i in (0:41)) {
  sub <- gtd[(1 + i*20):((nrow(gtd)/2) + i*20), ]
  severity[i + 1] <- mean(sub$nkill, na.rm=TRUE)
}
remove(i, sub)

# estimating the impact of 2G and 3G for each subset 
out <- data.frame("two" = c(0), "two.se" = c(0), "three" = c(0), "three.se" = c(0))
for (i in (1:42)){
  formula <- paste0(paste0("gtd.", i), " ~ two + three")
  md <-  sacsarlm(formula = formula, data = afg_adm, 
                  listw = w, Durbin = TRUE, zero.policy = TRUE)
  res <- c(md$coefficients[2], md$rest.se[2], md$coefficients[3], md$rest.se[3])
  out <- rbind(out, res)
  remove(res)
}
out <- out[-1,]
out$rowid <- c(1:nrow(out))
out$severity <- severity

#### Figure 5: Testing potential reporting bias, adapted from Weidmann (2016) ####

plot1 <- ggplot(out) + 
  geom_linerange(mapping = aes(rowid, two, ymin = two - 1.96*two.se, ymax = two + 1.96*two.se), color = "red", alpha = 0.6) +
  geom_linerange(mapping = aes(rowid, three, ymin = three - 1.96*three.se, ymax = three + 1.96*three.se), color = "blue", alpha = 0.6) + 
  theme(plot.margin = unit(c(1,1,0,1), "cm")) + 
  scale_x_continuous(breaks=c(), labels=c()) + 
  geom_point(aes(rowid, two), color = "red") + 
  geom_point(aes(rowid, three), color = "blue") + 
  geom_hline(yintercept = 0, linetype = "dotted") + 
  ylab("Estimated coefficients for 2G and 3G") + 
  xlab("") + 
  theme(axis.text.y = element_text(angle = 90, hjust = 0.5)) + theme_bw()

plot2 <- ggplot(out, aes(rowid, severity)) + 
  geom_bar(stat = "identity", fill="grey") + 
  scale_x_continuous(breaks=c(), labels=c()) + 
  theme(axis.text.y = element_text(angle = 90, hjust = 0.5)) + 
  scale_y_continuous("Mean severity of the subset") + 
  theme_bw() + xlab("Subsets with increasing severity")

pdf("Outputs/Figure5.pdf", height=7, width=10)
grid.arrange(plot1, plot2, ncol=1)
dev.off()


rm(list = ls())

### end of script ###